chore: Replace isinstance(obj, T) with type(obj) is T comparisons #1292

bharatr21 · 2025-12-01T06:05:53Z

Description

Replace isinstance(obj, T) checks with type(obj) is T to optimize cuda.core.launch()

Additional Notes

I made a benchmarking script in Cython to prove the speedup of using type() in place of isinstance() checks since the original issue requested profiling which resulted in an ~5x speedup.
Appreciate some guidance to know if I've done the profiling right

Created a file benchmark_isinstance_cython.pyx :

from cpython.mem cimport PyMem_Malloc, PyMem_Free
from libc.stdint cimport (intptr_t,
                         int8_t, int16_t, int32_t, int64_t,
                         uint8_t, uint16_t, uint32_t, uint64_t)
from libcpp cimport bool as cpp_bool
from libcpp.complex cimport complex as cpp_complex
from libcpp.vector cimport vector

import ctypes
import numpy
import time
from statistics import mean, stdev


ctypedef cpp_complex.complex[float] cpp_single_complex
ctypedef cpp_complex.complex[double] cpp_double_complex

# Cache type objects
cdef object ctypes_bool = ctypes.c_bool
cdef object ctypes_int8 = ctypes.c_int8
cdef object ctypes_int16 = ctypes.c_int16
cdef object ctypes_int32 = ctypes.c_int32
cdef object ctypes_int64 = ctypes.c_int64
cdef object ctypes_uint8 = ctypes.c_uint8
cdef object ctypes_uint16 = ctypes.c_uint16
cdef object ctypes_uint32 = ctypes.c_uint32
cdef object ctypes_uint64 = ctypes.c_uint64
cdef object ctypes_float = ctypes.c_float
cdef object ctypes_double = ctypes.c_double
cdef object numpy_bool = numpy.bool_
cdef object numpy_int8 = numpy.int8
cdef object numpy_int16 = numpy.int16
cdef object numpy_int32 = numpy.int32
cdef object numpy_int64 = numpy.int64
cdef object numpy_uint8 = numpy.uint8
cdef object numpy_uint16 = numpy.uint16
cdef object numpy_uint32 = numpy.uint32
cdef object numpy_uint64 = numpy.uint64
cdef object numpy_float16 = numpy.float16
cdef object numpy_float32 = numpy.float32
cdef object numpy_float64 = numpy.float64
cdef object numpy_complex64 = numpy.complex64
cdef object numpy_complex128 = numpy.complex128

# Limitation due to cython/cython#534
ctypedef void* voidptr


# ============================================================================
# Version 1: Current implementation using isinstance()
# ============================================================================

cdef inline int prepare_ctypes_arg_isinstance(
       vector[void*]& data,
       vector[void*]& data_addresses,
       arg,
       const size_t idx) except -1:
   cdef void* ptr

   if isinstance(arg, ctypes_bool):
       ptr = PyMem_Malloc(sizeof(cpp_bool))
       (<cpp_bool*>ptr)[0] = <cpp_bool>(arg.value)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif isinstance(arg, ctypes_int8):
       ptr = PyMem_Malloc(sizeof(int8_t))
       (<int8_t*>ptr)[0] = <int8_t>(arg.value)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif isinstance(arg, ctypes_int16):
       ptr = PyMem_Malloc(sizeof(int16_t))
       (<int16_t*>ptr)[0] = <int16_t>(arg.value)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif isinstance(arg, ctypes_int32):
       ptr = PyMem_Malloc(sizeof(int32_t))
       (<int32_t*>ptr)[0] = <int32_t>(arg.value)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif isinstance(arg, ctypes_int64):
       ptr = PyMem_Malloc(sizeof(int64_t))
       (<int64_t*>ptr)[0] = <int64_t>(arg.value)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif isinstance(arg, ctypes_uint8):
       ptr = PyMem_Malloc(sizeof(uint8_t))
       (<uint8_t*>ptr)[0] = <uint8_t>(arg.value)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif isinstance(arg, ctypes_uint16):
       ptr = PyMem_Malloc(sizeof(uint16_t))
       (<uint16_t*>ptr)[0] = <uint16_t>(arg.value)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif isinstance(arg, ctypes_uint32):
       ptr = PyMem_Malloc(sizeof(uint32_t))
       (<uint32_t*>ptr)[0] = <uint32_t>(arg.value)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif isinstance(arg, ctypes_uint64):
       ptr = PyMem_Malloc(sizeof(uint64_t))
       (<uint64_t*>ptr)[0] = <uint64_t>(arg.value)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif isinstance(arg, ctypes_float):
       ptr = PyMem_Malloc(sizeof(float))
       (<float*>ptr)[0] = <float>(arg.value)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif isinstance(arg, ctypes_double):
       ptr = PyMem_Malloc(sizeof(double))
       (<double*>ptr)[0] = <double>(arg.value)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   else:
       return 1


cdef inline int prepare_numpy_arg_isinstance(
       vector[void*]& data,
       vector[void*]& data_addresses,
       arg,
       const size_t idx) except -1:
   cdef void* ptr

   if isinstance(arg, numpy_bool):
       ptr = PyMem_Malloc(sizeof(cpp_bool))
       (<cpp_bool*>ptr)[0] = <cpp_bool>(arg)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif isinstance(arg, numpy_int8):
       ptr = PyMem_Malloc(sizeof(int8_t))
       (<int8_t*>ptr)[0] = <int8_t>(arg)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif isinstance(arg, numpy_int16):
       ptr = PyMem_Malloc(sizeof(int16_t))
       (<int16_t*>ptr)[0] = <int16_t>(arg)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif isinstance(arg, numpy_int32):
       ptr = PyMem_Malloc(sizeof(int32_t))
       (<int32_t*>ptr)[0] = <int32_t>(arg)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif isinstance(arg, numpy_int64):
       ptr = PyMem_Malloc(sizeof(int64_t))
       (<int64_t*>ptr)[0] = <int64_t>(arg)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif isinstance(arg, numpy_uint8):
       ptr = PyMem_Malloc(sizeof(uint8_t))
       (<uint8_t*>ptr)[0] = <uint8_t>(arg)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif isinstance(arg, numpy_uint16):
       ptr = PyMem_Malloc(sizeof(uint16_t))
       (<uint16_t*>ptr)[0] = <uint16_t>(arg)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif isinstance(arg, numpy_uint32):
       ptr = PyMem_Malloc(sizeof(uint32_t))
       (<uint32_t*>ptr)[0] = <uint32_t>(arg)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif isinstance(arg, numpy_uint64):
       ptr = PyMem_Malloc(sizeof(uint64_t))
       (<uint64_t*>ptr)[0] = <uint64_t>(arg)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif isinstance(arg, numpy_float32):
       ptr = PyMem_Malloc(sizeof(float))
       (<float*>ptr)[0] = <float>(arg)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif isinstance(arg, numpy_float64):
       ptr = PyMem_Malloc(sizeof(double))
       (<double*>ptr)[0] = <double>(arg)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif isinstance(arg, numpy_complex64):
       ptr = PyMem_Malloc(sizeof(cpp_single_complex))
       (<cpp_single_complex*>ptr)[0] = cpp_complex.complex[float](arg.real, arg.imag)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif isinstance(arg, numpy_complex128):
       ptr = PyMem_Malloc(sizeof(cpp_double_complex))
       (<cpp_double_complex*>ptr)[0] = cpp_complex.complex[double](arg.real, arg.imag)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   else:
       return 1


# ============================================================================
# Version 2: Optimized implementation using type() is with isinstance fallback
# ============================================================================

cdef inline int prepare_ctypes_arg_type_is(
       vector[void*]& data,
       vector[void*]& data_addresses,
       arg,
       const size_t idx) except -1:
   cdef void* ptr
   cdef object arg_type = type(arg)

   if arg_type is ctypes_bool:
       ptr = PyMem_Malloc(sizeof(cpp_bool))
       (<cpp_bool*>ptr)[0] = <cpp_bool>(arg.value)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif arg_type is ctypes_int8:
       ptr = PyMem_Malloc(sizeof(int8_t))
       (<int8_t*>ptr)[0] = <int8_t>(arg.value)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif arg_type is ctypes_int16:
       ptr = PyMem_Malloc(sizeof(int16_t))
       (<int16_t*>ptr)[0] = <int16_t>(arg.value)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif arg_type is ctypes_int32:
       ptr = PyMem_Malloc(sizeof(int32_t))
       (<int32_t*>ptr)[0] = <int32_t>(arg.value)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif arg_type is ctypes_int64:
       ptr = PyMem_Malloc(sizeof(int64_t))
       (<int64_t*>ptr)[0] = <int64_t>(arg.value)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif arg_type is ctypes_uint8:
       ptr = PyMem_Malloc(sizeof(uint8_t))
       (<uint8_t*>ptr)[0] = <uint8_t>(arg.value)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif arg_type is ctypes_uint16:
       ptr = PyMem_Malloc(sizeof(uint16_t))
       (<uint16_t*>ptr)[0] = <uint16_t>(arg.value)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif arg_type is ctypes_uint32:
       ptr = PyMem_Malloc(sizeof(uint32_t))
       (<uint32_t*>ptr)[0] = <uint32_t>(arg.value)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif arg_type is ctypes_uint64:
       ptr = PyMem_Malloc(sizeof(uint64_t))
       (<uint64_t*>ptr)[0] = <uint64_t>(arg.value)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif arg_type is ctypes_float:
       ptr = PyMem_Malloc(sizeof(float))
       (<float*>ptr)[0] = <float>(arg.value)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif arg_type is ctypes_double:
       ptr = PyMem_Malloc(sizeof(double))
       (<double*>ptr)[0] = <double>(arg.value)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   else:
       # Backward compatibility: fallback to isinstance for subclasses
       if isinstance(arg, ctypes_bool):
           ptr = PyMem_Malloc(sizeof(cpp_bool))
           (<cpp_bool*>ptr)[0] = <cpp_bool>(arg.value)
           data_addresses[idx] = ptr
           data[idx] = ptr
           return 0
       elif isinstance(arg, ctypes_int8):
           ptr = PyMem_Malloc(sizeof(int8_t))
           (<int8_t*>ptr)[0] = <int8_t>(arg.value)
           data_addresses[idx] = ptr
           data[idx] = ptr
           return 0
       elif isinstance(arg, ctypes_int16):
           ptr = PyMem_Malloc(sizeof(int16_t))
           (<int16_t*>ptr)[0] = <int16_t>(arg.value)
           data_addresses[idx] = ptr
           data[idx] = ptr
           return 0
       elif isinstance(arg, ctypes_int32):
           ptr = PyMem_Malloc(sizeof(int32_t))
           (<int32_t*>ptr)[0] = <int32_t>(arg.value)
           data_addresses[idx] = ptr
           data[idx] = ptr
           return 0
       elif isinstance(arg, ctypes_int64):
           ptr = PyMem_Malloc(sizeof(int64_t))
           (<int64_t*>ptr)[0] = <int64_t>(arg.value)
           data_addresses[idx] = ptr
           data[idx] = ptr
           return 0
       elif isinstance(arg, ctypes_uint8):
           ptr = PyMem_Malloc(sizeof(uint8_t))
           (<uint8_t*>ptr)[0] = <uint8_t>(arg.value)
           data_addresses[idx] = ptr
           data[idx] = ptr
           return 0
       elif isinstance(arg, ctypes_uint16):
           ptr = PyMem_Malloc(sizeof(uint16_t))
           (<uint16_t*>ptr)[0] = <uint16_t>(arg.value)
           data_addresses[idx] = ptr
           data[idx] = ptr
           return 0
       elif isinstance(arg, ctypes_uint32):
           ptr = PyMem_Malloc(sizeof(uint32_t))
           (<uint32_t*>ptr)[0] = <uint32_t>(arg.value)
           data_addresses[idx] = ptr
           data[idx] = ptr
           return 0
       elif isinstance(arg, ctypes_uint64):
           ptr = PyMem_Malloc(sizeof(uint64_t))
           (<uint64_t*>ptr)[0] = <uint64_t>(arg.value)
           data_addresses[idx] = ptr
           data[idx] = ptr
           return 0
       elif isinstance(arg, ctypes_float):
           ptr = PyMem_Malloc(sizeof(float))
           (<float*>ptr)[0] = <float>(arg.value)
           data_addresses[idx] = ptr
           data[idx] = ptr
           return 0
       elif isinstance(arg, ctypes_double):
           ptr = PyMem_Malloc(sizeof(double))
           (<double*>ptr)[0] = <double>(arg.value)
           data_addresses[idx] = ptr
           data[idx] = ptr
           return 0
       else:
           return 1


cdef inline int prepare_numpy_arg_type_is(
       vector[void*]& data,
       vector[void*]& data_addresses,
       arg,
       const size_t idx) except -1:
   cdef void* ptr
   cdef object arg_type = type(arg)

   if arg_type is numpy_bool:
       ptr = PyMem_Malloc(sizeof(cpp_bool))
       (<cpp_bool*>ptr)[0] = <cpp_bool>(arg)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif arg_type is numpy_int8:
       ptr = PyMem_Malloc(sizeof(int8_t))
       (<int8_t*>ptr)[0] = <int8_t>(arg)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif arg_type is numpy_int16:
       ptr = PyMem_Malloc(sizeof(int16_t))
       (<int16_t*>ptr)[0] = <int16_t>(arg)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif arg_type is numpy_int32:
       ptr = PyMem_Malloc(sizeof(int32_t))
       (<int32_t*>ptr)[0] = <int32_t>(arg)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif arg_type is numpy_int64:
       ptr = PyMem_Malloc(sizeof(int64_t))
       (<int64_t*>ptr)[0] = <int64_t>(arg)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif arg_type is numpy_uint8:
       ptr = PyMem_Malloc(sizeof(uint8_t))
       (<uint8_t*>ptr)[0] = <uint8_t>(arg)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif arg_type is numpy_uint16:
       ptr = PyMem_Malloc(sizeof(uint16_t))
       (<uint16_t*>ptr)[0] = <uint16_t>(arg)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif arg_type is numpy_uint32:
       ptr = PyMem_Malloc(sizeof(uint32_t))
       (<uint32_t*>ptr)[0] = <uint32_t>(arg)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif arg_type is numpy_uint64:
       ptr = PyMem_Malloc(sizeof(uint64_t))
       (<uint64_t*>ptr)[0] = <uint64_t>(arg)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif arg_type is numpy_float32:
       ptr = PyMem_Malloc(sizeof(float))
       (<float*>ptr)[0] = <float>(arg)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif arg_type is numpy_float64:
       ptr = PyMem_Malloc(sizeof(double))
       (<double*>ptr)[0] = <double>(arg)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif arg_type is numpy_complex64:
       ptr = PyMem_Malloc(sizeof(cpp_single_complex))
       (<cpp_single_complex*>ptr)[0] = cpp_complex.complex[float](arg.real, arg.imag)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   elif arg_type is numpy_complex128:
       ptr = PyMem_Malloc(sizeof(cpp_double_complex))
       (<cpp_double_complex*>ptr)[0] = cpp_complex.complex[double](arg.real, arg.imag)
       data_addresses[idx] = ptr
       data[idx] = ptr
       return 0
   else:
       # Backward compatibility: fallback to isinstance for subclasses
       if isinstance(arg, numpy_bool):
           ptr = PyMem_Malloc(sizeof(cpp_bool))
           (<cpp_bool*>ptr)[0] = <cpp_bool>(arg)
           data_addresses[idx] = ptr
           data[idx] = ptr
           return 0
       elif isinstance(arg, numpy_int8):
           ptr = PyMem_Malloc(sizeof(int8_t))
           (<int8_t*>ptr)[0] = <int8_t>(arg)
           data_addresses[idx] = ptr
           data[idx] = ptr
           return 0
       elif isinstance(arg, numpy_int16):
           ptr = PyMem_Malloc(sizeof(int16_t))
           (<int16_t*>ptr)[0] = <int16_t>(arg)
           data_addresses[idx] = ptr
           data[idx] = ptr
           return 0
       elif isinstance(arg, numpy_int32):
           ptr = PyMem_Malloc(sizeof(int32_t))
           (<int32_t*>ptr)[0] = <int32_t>(arg)
           data_addresses[idx] = ptr
           data[idx] = ptr
           return 0
       elif isinstance(arg, numpy_int64):
           ptr = PyMem_Malloc(sizeof(int64_t))
           (<int64_t*>ptr)[0] = <int64_t>(arg)
           data_addresses[idx] = ptr
           data[idx] = ptr
           return 0
       elif isinstance(arg, numpy_uint8):
           ptr = PyMem_Malloc(sizeof(uint8_t))
           (<uint8_t*>ptr)[0] = <uint8_t>(arg)
           data_addresses[idx] = ptr
           data[idx] = ptr
           return 0
       elif isinstance(arg, numpy_uint16):
           ptr = PyMem_Malloc(sizeof(uint16_t))
           (<uint16_t*>ptr)[0] = <uint16_t>(arg)
           data_addresses[idx] = ptr
           data[idx] = ptr
           return 0
       elif isinstance(arg, numpy_uint32):
           ptr = PyMem_Malloc(sizeof(uint32_t))
           (<uint32_t*>ptr)[0] = <uint32_t>(arg)
           data_addresses[idx] = ptr
           data[idx] = ptr
           return 0
       elif isinstance(arg, numpy_uint64):
           ptr = PyMem_Malloc(sizeof(uint64_t))
           (<uint64_t*>ptr)[0] = <uint64_t>(arg)
           data_addresses[idx] = ptr
           data[idx] = ptr
           return 0
       elif isinstance(arg, numpy_float32):
           ptr = PyMem_Malloc(sizeof(float))
           (<float*>ptr)[0] = <float>(arg)
           data_addresses[idx] = ptr
           data[idx] = ptr
           return 0
       elif isinstance(arg, numpy_float64):
           ptr = PyMem_Malloc(sizeof(double))
           (<double*>ptr)[0] = <double>(arg)
           data_addresses[idx] = ptr
           data[idx] = ptr
           return 0
       elif isinstance(arg, numpy_complex64):
           ptr = PyMem_Malloc(sizeof(cpp_single_complex))
           (<cpp_single_complex*>ptr)[0] = cpp_complex.complex[float](arg.real, arg.imag)
           data_addresses[idx] = ptr
           data[idx] = ptr
           return 0
       elif isinstance(arg, numpy_complex128):
           ptr = PyMem_Malloc(sizeof(cpp_double_complex))
           (<cpp_double_complex*>ptr)[0] = cpp_complex.complex[double](arg.real, arg.imag)
           data_addresses[idx] = ptr
           data[idx] = ptr
           return 0
       else:
           return 1


# ============================================================================
# Benchmark functions
# ============================================================================

def benchmark_isinstance(kernel_args, int iterations):
   """Benchmark the isinstance() approach."""
   cdef size_t n_args = len(kernel_args)
   cdef size_t i, j
   cdef int not_prepared
   cdef vector[voidptr] data
   cdef vector[voidptr] data_addresses
   cdef double start, end

   # Warmup
   for _ in range(100):
       data = vector[voidptr](n_args, NULL)
       data_addresses = vector[voidptr](n_args)
       for i, arg in enumerate(kernel_args):
           if isinstance(arg, int):
               continue
           elif isinstance(arg, float):
               continue
           elif isinstance(arg, complex):
               continue
           elif isinstance(arg, bool):
               continue

           not_prepared = prepare_numpy_arg_isinstance(data, data_addresses, arg, i)
           if not_prepared:
               not_prepared = prepare_ctypes_arg_isinstance(data, data_addresses, arg, i)

       for data_ptr in data:
           if data_ptr:
               PyMem_Free(data_ptr)

   # Actual benchmark
   start = time.perf_counter()
   for j in range(iterations):
       data = vector[voidptr](n_args, NULL)
       data_addresses = vector[voidptr](n_args)

       for i, arg in enumerate(kernel_args):
           if isinstance(arg, int):
               continue
           elif isinstance(arg, float):
               continue
           elif isinstance(arg, complex):
               continue
           elif isinstance(arg, bool):
               continue

           not_prepared = prepare_numpy_arg_isinstance(data, data_addresses, arg, i)
           if not_prepared:
               not_prepared = prepare_ctypes_arg_isinstance(data, data_addresses, arg, i)

       for data_ptr in data:
           if data_ptr:
               PyMem_Free(data_ptr)

   end = time.perf_counter()
   return end - start


def benchmark_type_is(kernel_args, int iterations):
   """Benchmark the type() is approach with isinstance fallback."""
   cdef size_t n_args = len(kernel_args)
   cdef size_t i, j
   cdef int not_prepared
   cdef vector[voidptr] data
   cdef vector[voidptr] data_addresses
   cdef double start, end
   cdef object arg_type

   # Warmup
   for _ in range(100):
       data = vector[voidptr](n_args, NULL)
       data_addresses = vector[voidptr](n_args)
       for i, arg in enumerate(kernel_args):
           arg_type = type(arg)
           if arg_type is int:
               continue
           elif arg_type is float:
               continue
           elif arg_type is complex:
               continue
           elif arg_type is bool:
               continue

           not_prepared = prepare_numpy_arg_type_is(data, data_addresses, arg, i)
           if not_prepared:
               not_prepared = prepare_ctypes_arg_type_is(data, data_addresses, arg, i)

       for data_ptr in data:
           if data_ptr:
               PyMem_Free(data_ptr)

   # Actual benchmark
   start = time.perf_counter()
   for j in range(iterations):
       data = vector[voidptr](n_args, NULL)
       data_addresses = vector[voidptr](n_args)

       for i, arg in enumerate(kernel_args):
           arg_type = type(arg)
           if arg_type is int:
               continue
           elif arg_type is float:
               continue
           elif arg_type is complex:
               continue
           elif arg_type is bool:
               continue

           not_prepared = prepare_numpy_arg_type_is(data, data_addresses, arg, i)
           if not_prepared:
               not_prepared = prepare_ctypes_arg_type_is(data, data_addresses, arg, i)

       for data_ptr in data:
           if data_ptr:
               PyMem_Free(data_ptr)

   end = time.perf_counter()
   return end - start


def run_benchmark():
   """Main benchmark runner."""
   print("=" * 70)
   print("Cython Benchmark: isinstance() vs type() is + isinstance fallback")
   print("Kernel Argument Handling Hot Path")
   print("=" * 70)
   print()

   # Create realistic kernel arguments
   kernel_args = [
       numpy.int32(100),
       numpy.float32(2.5),
       numpy.float64(1.23),
       numpy.complex64(1+1j),
       numpy.int64(999),
       numpy.uint32(255),
       ctypes.c_int32(50),
       ctypes.c_float(1.5),
       ctypes.c_double(2.7),
       numpy.int8(10),
       numpy.int16(20),
       numpy.uint8(5),
       numpy.uint16(30),
       numpy.float32(0.5),
   ]

   iterations = 50000
   num_runs = 10

   print(f"Configuration:")
   print(f"  - Arguments per launch: {len(kernel_args)}")
   print(f"  - Simulated launches per run: {iterations:,}")
   print(f"  - Number of runs: {num_runs}")
   print(f"  - Total argument processing: {len(kernel_args) * iterations * num_runs:,}")
   print()

   print("Running isinstance() benchmark...")
   isinstance_times = []
   for i in range(num_runs):
       t = benchmark_isinstance(kernel_args, iterations)
       isinstance_times.append(t)
       print(f"  Run {i+1}: {t:.4f}s")
   isinstance_mean = mean(isinstance_times)
   isinstance_stdev = stdev(isinstance_times)
   print(f"  Mean: {isinstance_mean:.4f}s ± {isinstance_stdev:.6f}s")
   print()

   print("Running type() is + isinstance fallback benchmark...")
   type_is_times = []
   for i in range(num_runs):
       t = benchmark_type_is(kernel_args, iterations)
       type_is_times.append(t)
       print(f"  Run {i+1}: {t:.4f}s")
   type_is_mean = mean(type_is_times)
   type_is_stdev = stdev(type_is_times)
   print(f"  Mean: {type_is_mean:.4f}s ± {type_is_stdev:.6f}s")
   print()

   print("=" * 70)
   print("RESULTS")
   print("=" * 70)
   print(f"isinstance():                 {isinstance_mean:.4f}s ± {isinstance_stdev:.6f}s")
   print(f"type() is + isinstance fallback: {type_is_mean:.4f}s ± {type_is_stdev:.6f}s")
   print()

   speedup = isinstance_mean / type_is_mean
   time_saved = isinstance_mean - type_is_mean
   percent_faster = (speedup - 1) * 100

   if speedup > 1.02:
       print(f"✓ type() is is {speedup:.2f}x FASTER ({percent_faster:.1f}% improvement)")
       print(f"  Time saved per 1M launches: {time_saved / (iterations * num_runs) * 1e6 * 1000:.2f}ms")
       print()
       print("RECOMMENDATION: Replace isinstance() with type() is")
   elif speedup < 0.98:
       print(f"⚠️  isinstance() is {1/speedup:.2f}x FASTER")
       print()
       print("RECOMMENDATION: Keep using isinstance()")
   else:
       print(f"≈ Performance is similar (difference < 2%)")
       print()
       print("RECOMMENDATION: Keep using isinstance() for clarity")
   print()

   # Per-launch cost
   total_launches = iterations * num_runs
   print(f"Per-launch argument processing cost:")
   print(f"  isinstance(): {isinstance_mean / total_launches * 1e6:.2f} µs")
   print(f"  type() is:    {type_is_mean / total_launches * 1e6:.2f} µs")
   print()

I mainly used the compiler flags -O3 and -march=native and compiled and ran the above benchmark via this setup script setup_benchmark.py:

#!/usr/bin/env python3
"""
Setup script for building the Cython benchmark extension.
"""

from setuptools import setup, Extension
from Cython.Build import cythonize
import numpy

extensions = [
   Extension(
       "benchmark_isinstance_cython",
       ["benchmark_isinstance_cython.pyx"],
       include_dirs=[numpy.get_include()],
       extra_compile_args=["-O3", "-march=native"],
       language="c++",
   )
]

setup(
   name="benchmark_isinstance_cython",
   ext_modules=cythonize(
       extensions,
       compiler_directives={
           'language_level': 3,
           'boundscheck': False,
           'wraparound': False,
           'cdivision': True,
       }
   ),
)

The script was then run with python setup_benchmark.py build_ext --inplace

UPDATE: Updated the benchmark script to include the isinstance() fallback and it still results in an ~3x speedup

Checklist

New or existing tests cover these changes.
The documentation is up to date with these changes.

copy-pr-bot · 2025-12-01T06:05:56Z

This pull request requires additional validation before any workflows can run on NVIDIA's runners.

Pull request vetters can view their responsibilities here.

Contributors can view more details about this message here.

bharatr21 · 2025-12-01T06:32:10Z

/ok to test

mdboom · 2025-12-02T13:17:41Z

/ok to test 0db38d0

github-actions · 2025-12-02T13:27:20Z

Doc Preview CI
🚀 View preview at https://nvidia.github.io/cuda-python/pr-preview/pr-1292/
https://nvidia.github.io/cuda-python/pr-preview/pr-1292/cuda-core/
https://nvidia.github.io/cuda-python/pr-preview/pr-1292/cuda-bindings/
https://nvidia.github.io/cuda-python/pr-preview/pr-1292/cuda-pathfinder/
Preview will be ready when the GitHub Pages deployment is complete.

mdboom

This is great work. I downloaded your script and was able to reproduce a similar result (5.23x faster) on my laptop. The math looks sound. Just to make sure, I replaced your manual calculations with Python's builtin timeit.timeit and pyperf (the latter being the sort of "gold standard" for accurate perf timings in Python). But the result is all roughly the same, and 5x is large enough that that level of accuracy doesn't really matter -- it's an obvious big win.

My only concern with this PR is backward compatibility. It is technically possible to subclass either a numpy or ctypes datatype right now and it would be accepted and work here with the isinstance check but would no longer be accepted after this change. I don't know how often that actually happens in practice, and our test suite obviously doesn't do that. I'm not sure how to assess how much we care about this -- it seems hard to do a GitHub code search for, for example. @leofang, thoughts?

If we determine we do want to be strict about backward compatibility, we could probably do:

if arg_type is ctypes_bool:
   ...
elif ...
   ...
else:
    # If no exact types are found, fallback to slower `isinstance` check
    if isinstance(arg_type, ctypes_bool):
        ...
    elif:
        ...
    else:
        return 1

Note that the fallback cases are in a separate if/elif/else block so that Cython can still optimize the outer one to a C switch statement.

I suspect that would not have a significant impact on the benchmark (which doesn't exercise subclasses). If we go this route, we should also add a test that creates a subclass of a ctype and numpy type and confirms that it works and does the right thing.

kkraus14 · 2025-12-02T15:35:25Z

My only concern with this PR is backward compatibility. It is technically possible to subclass either a numpy or ctypes datatype right now and it would be accepted and work here with the isinstance check but would no longer be accepted after this change. I don't know how often that actually happens in practice, and our test suite obviously doesn't do that. I'm not sure how to assess how much we care about this -- it seems hard to do a GitHub code search for, for example. @leofang, thoughts?

If we determine we do want to be strict about backward compatibility, we could probably do:
if arg_type is ctypes_bool:
   ...
elif ...
   ...
else:
    # If no exact types are found, fallback to slower `isinstance` check
    if isinstance(arg_type, ctypes_bool):
        ...
    elif:
        ...
    else:
        return 1
Note that the fallback cases are in a separate if/elif/else block so that Cython can still optimize the outer one to a C switch statement.

Outside of backward compatibility, I think it's reasonable to continue supporting subclasses but to fast path the most common types that we expect users to use, so I'm in support of taking this approach.

bharatr21 · 2025-12-02T15:50:14Z

My only concern with this PR is backward compatibility. It is technically possible to subclass either a numpy or ctypes datatype right now and it would be accepted and work here with the isinstance check but would no longer be accepted after this change. I don't know how often that actually happens in practice, and our test suite obviously doesn't do that. I'm not sure how to assess how much we care about this -- it seems hard to do a GitHub code search for, for example. @leofang, thoughts?
If we determine we do want to be strict about backward compatibility, we could probably do:
if arg_type is ctypes_bool:
   ...
elif ...
   ...
else:
    # If no exact types are found, fallback to slower `isinstance` check
    if isinstance(arg_type, ctypes_bool):
        ...
    elif:
        ...
    else:
        return 1
Note that the fallback cases are in a separate if/elif/else block so that Cython can still optimize the outer one to a C switch statement.
Outside of backward compatibility, I think it's reasonable to continue supporting subclasses but to fast path the most common types that we expect users to use, so I'm in support of taking this approach.

To clarify my understanding, would it be enough to do:

if arg_type is ctypes_bool:
   ...
elif isinstance(arg_type, ctypes_bool):
   ...
else:
     return 1

or should the check be done manually for all possible subclasses of type T, like

if arg_type is T:
   ...
elif arg_type is subclass_1:
   ...
elif arg_type is subclass_2:
   ...
elif isinstance(arg_type, T):
   ...
else:
     return 1

bharatr21 · 2025-12-02T15:58:01Z

/ok to test 0fd52b4

kkraus14 · 2025-12-02T16:11:39Z

To clarify my understanding, would it be enough to do:

if arg_type is ctypes_bool:
   ...
elif isinstance(arg_type, ctypes_bool):
   ...
else:
     return 1

or should the check be done manually for all possible subclasses of type T, like

if arg_type is T:
   ...
elif arg_type is subclass_1:
   ...
elif arg_type is subclass_2:
   ...
elif isinstance(arg_type, T):
   ...
else:
     return 1

The subclasses would likely be user created / controlled, so we can't possibly cover them all, so we should go with the first option. If we find there's some kind of common subclass then it's something we could explore fast pathing in the future.

leofang · 2025-12-02T16:17:50Z

I am not too worried about backward compatibility in this case, because it is rare in practice that someone would e.g. subclass np.int32. I have never seen such usage. I am fine merging this as-is.

If we want to keep bc, Mike's suggestion on shuffling all existing isinstance checks to the very end of the big if-else block makes perfect sense. Do not interleave is and isinstance for each single type.

bharatr21 · 2025-12-02T16:38:00Z

I am not too worried about backward compatibility in this case, because it is rare in practice that someone would e.g. subclass np.int32. I have never seen such usage. I am fine merging this as-is.

If we want to keep bc, Mike's suggestion on shuffling all existing isinstance checks to the very end of the big if-else block makes perfect sense. Do not interleave is and isinstance for each single type.

So basically retaining this structure, but adding all the isinstance checks after the last elif block? Like:

if arg_type is T1:
 ...
elif arg_type is T2:
 ...
elif arg_type is T3:
 ...
else:
    if isinstance(arg_type, T1):
      ...
    elif isinstance(arg_type, T2):
      ...
    elif isinstance(arg_type, T3):
      ...
    else:
       return 1

leofang · 2025-12-02T17:41:55Z

Yes, this was what Mike suggested 🙂

Signed-off-by: Bharat Raghunathan <[email protected]>

bharatr21 · 2025-12-02T21:03:18Z

/ok to test 0b9d8ad

mdboom · 2025-12-03T15:12:01Z

/ok to test 5e09e0c

mdboom

This looks great. Thanks again. Let's merge this once the tests have passed.

mdboom

Looks like there are 2 test failures here that will need to be addressed. I'm not exactly sure why they are failing, but let me know if I can help once you've had a chance to look.

Signed-off-by: Bharat Raghunathan <[email protected]>

leofang

I don't think commit ed81c11 is correct. The bool treatment should not be changed. @bharatr21 have you been able to run the test suite locally and confirm everything passes before pushing?

bharatr21 · 2025-12-04T04:10:27Z

I don't think commit ed81c11 is correct. The bool treatment should not be changed. @bharatr21 have you been able to run the test suite locally and confirm everything passes before pushing?

It looks like this kernel logic is treating the Python primitive bool also to be 4 bytes, unlike numpy_bool or ctypes_bool which is why this change is necessary. I don't have CUDA hardware locally so I was unable to run the tests locally, I'm trying to setup some cloud GPU environment so I can run it now.

leofang · 2025-12-04T04:58:49Z

Let us know if you have trouble setting up an environment!

I tried bisecting locally and it seems the test failures in test_graph.py started from the first commit 874b7f4, but it is not obvious to me what could go wrong. Either Mike or I can take a look tomorrow if you haven't found the root cause by then already 🙂

bharatr21 · 2025-12-04T05:43:21Z

Let us know if you have trouble setting up an environment!

I tried bisecting locally and it seems the test failures in test_graph.py started from the first commit 874b7f4, but it is not obvious to me what could go wrong. Either Mike or I can take a look tomorrow if you haven't found the root cause by then already 🙂

May I get some guidance on how to setup the environment on a system without GPUs?
Or does that defeat the purpose 😅

I'm also slightly surprised/confused, as all CI checks here seem to have passed since ed81c11

mdboom · 2025-12-04T13:57:23Z

The old set of checks was like this:

            if isinstance(arg, Buffer):
                ...
            elif isinstance(arg, int):
                ...
            elif isinstance(arg, float):
                ...
            elif isinstance(arg, complex):
                ...
            elif isinstance(arg, bool):
                ...

Since isinstance(True, int) == True (surprising, but true!), it is impossible to hit the last branch here (it will always hit isinstance(arg, int) first), and the code in the bool branch was probably never tested.

This PR moves this to:

            if arg_type is Buffer:
                ...
            elif arg_type is bool:
                ...
            elif arg_type is int:
                ...
            elif arg_type is float:
                ...
            elif arg_type is complex:
                ...

The ordering has changed, but the ordering also should no longer matter because it's now doing exact type checks.

When Cython converts a Python bool to C, it is defined as being an int here: "The special bint type is used for C boolean values (int with 0/non-0 values for False/True)"

So, I think @bharatr21's change here is correct. I played a bit with specifying it this way:

prepare_arg[bint](self.data, self.data_addresses, arg, i)

but this doesn't work. bint is sort of a "special" type in Cython and doesn't behave like the other integer typedefs. Likewise, since bint is defined as "whatever C int is" (which may not always be 32 bits, even though it is on today's mainstream platforms). I tried:

prepare_arg[int](self.data, self.data_addresses, arg, i)

but that also doesn't work. I can't find a way to specify "C int" from Cython. However, uint32_t is defined as ctypedef unsigned int uint32_t by Cython, so what @bharatr21 has here is probably the best we can do:

prepare_arg[uint32_t](self.data, self.data_addresses, int(arg), i)

The only small tweak to make is that it seems that the explicit Python cast from a bool to an int is not necessary.

mdboom

LGTM (see my longer explanation). Just need to remove the unnecessary casts.

cuda_core/cuda/core/experimental/_kernel_arg_handler.pyx

mdboom · 2025-12-04T14:00:45Z

May I get some guidance on how to setup the environment on a system without GPUs?
Or does that defeat the purpose 😅

Yeah, I think unfortunately we can't really test this without a GPU. There may be other suggestions, but you can get 30 days/$200 free credits on Azure to at least get started. Not a long term solution, though.

leofang · 2025-12-04T16:02:02Z

Since isinstance(True, int) == True (surprising, but true!), it is impossible to hit the last branch here (it will always hit isinstance(arg, int) first), and the code in the bool branch was probably never tested.

Ah! I was playing around with this last night, but for some reason I missed checking this combination. Good catch, Mike!

The only small tweak to make is that it seems that the explicit Python cast from a bool to an int is not necessary.

That's right. The prepare_arg helper is designed to do the type casting correctly. IIRC in order to prepare for kernel launch we already have to cast each arg from Python to C++ and we should not add another cast.

Explicit cast not needed since `prepare_arg` does it automatically Co-authored-by: Leo Fang <[email protected]>

mdboom · 2025-12-04T17:45:28Z

/ok to test 0fc1ed8

mdboom · 2025-12-04T17:47:36Z

I've added commit 0fc1ed8 here to fix the failing test (so it expects a bool) and added a changelog comment (since that's technically a breaking change). It remains a mystery to me why this test, testing passing bool values to a kernel, didn't break.

cuda-python/cuda_core/tests/test_launcher.py

Line 157 in c860f3f

(bool, "bool", True),

Maybe there is something different about the argument passing to a set_handle function vs. an actual kernel.

mdboom · 2025-12-04T22:16:41Z

Maybe there is something different about the argument passing to a set_handle function vs. an actual kernel.

I figured out why this is the case.

On main:

test_graph_conditional_if was passing a 4-byte bool to a 4-byte int. This works because they match.
test_launch_scalar_argument was passing a 4-byte bool to a 1-byte bool. This works because the first byte of a little-endian 4-byte bool will always have the same value as a 1-byte bool

On this PR (without fixing the single test):

test_graph_conditional_if was passing a 1-byte bool to a 4-byte int. If there is any non-zero garbage in the 3 bytes following the 1-byte passed in, a False could read as True.
test_launch_scalar_argument was passing a 1-byte bool to a 1-byte bool. This works because they match.

cuda_core/tests/test_graph.py

leofang · 2025-12-05T00:33:10Z

cuda_core/cuda/core/experimental/_kernel_arg_handler.pyx

                # TODO: revisit this treatment if we decide to cythonize cuda.core
-                if isinstance(arg, driver.CUgraphConditionalHandle):
+                if arg_type is driver.CUgraphConditionalHandle:
+                    prepare_arg[intptr_t](self.data, self.data_addresses, <intptr_t>int(arg), i)


Looks like in the header it's a typedef of unsigned long long, let's be explicit:

Suggested change

prepare_arg[intptr_t](self.data, self.data_addresses, <intptr_t>int(arg), i)

prepare_arg[unsigned long long](self.data, self.data_addresses, arg, i)

alternatively, I think this should work too

Suggested change

prepare_arg[intptr_t](self.data, self.data_addresses, <intptr_t>int(arg), i)

prepare_arg[cydriver.CUgraphConditionalHandle](self.data, self.data_addresses, arg, i)

The first option doesn't work because the Cython parser can't handle unsigned long long in that place. It would have to be some sort of typedef to unsigned long long. But I like the second option a lot better anyway -- seems more explicit about what it needs to match.

The first option doesn't work because the Cython parser can't handle unsigned long long in that place.

Ah yes. Need to add it to the list under the supported_type fused type. But I agree the 2nd option is better.

cuda_core/cuda/core/experimental/_kernel_arg_handler.pyx

cuda_core/tests/test_graph.py

leofang · 2025-12-05T00:35:22Z

Thanks, @mdboom @bharatr21. Looks like this PR is now both an enhancement (better perf) and bug fix (exposing two type casting bugs) 😄

mdboom · 2025-12-05T13:02:14Z

Thanks, @mdboom @bharatr21. Looks like this PR is now both an enhancement (better perf) and bug fix (exposing two type casting bugs) 😄

My last commit improves the tests to test creating conditional handles in both ways: int and bool. The types being sent need to match the type specified in the argument to set_handle, so the kernel is generated both ways depending on the input type.

I think to strictly follow what upstream cudaGraphConditionalHandle does, set_handle should take an unsigned int, and then if the user wants to pass a bool they would need to cast it to an int like so:

condition_value = True
...
launch(gb, LaunchConfig(grid=1, block=1), set_handle, handle, int(condition_value))

...and this is now being tested.

mdboom · 2025-12-05T14:22:53Z

/ok to test 48a0856

cuda_core/cuda/core/experimental/_kernel_arg_handler.pyx

leofang · 2025-12-06T04:40:43Z

cuda_core/cuda/core/experimental/_kernel_arg_handler.pyx

        return prepare_arg[cpp_double_complex](data, data_addresses, arg, idx)
    else:
-        return 1
+        # If no exact types are found, fallback to slower `isinstance` check


Note: this block might add penalty to ctypes and graph conditional handle dispatch, since they are tried after NumPy. I dunno how much this costs, though.

leofang · 2025-12-06T04:43:42Z

cuda_core/docs/source/release/0.5.x-notes.rst

+- Python ``bool`` objects are now converted to C++ ``bool`` type when passed as kernel
+  arguments.  Previously, they were converted to ``int``.  This brings them inline
+  with ``ctypes.c_bool`` and ``numpy.bool_``.


Q: I think this no longer applies? Should we just add another entry to mention the perf improvement

leofang · 2025-12-06T04:44:08Z

/ok to test d9a7c14

chore: Replace isinstance(obj, T) with type(obj) is T comparisons

874b7f4

leofang requested a review from mdboom December 1, 2025 15:49

leofang added enhancement Any code-related improvements P1 Medium priority - Should do cuda.core Everything related to the cuda.core module labels Dec 1, 2025

leofang added this to the cuda.core beta 10 milestone Dec 1, 2025

Merge branch 'main' into isinstance-type

0db38d0

NVIDIA deleted a comment from copy-pr-bot bot Dec 2, 2025

mdboom reviewed Dec 2, 2025

View reviewed changes

Merge branch 'main' into isinstance-type

0fd52b4

Add isinstance fallback to maintain backward compat for subclasses

eb9ca80

Signed-off-by: Bharat Raghunathan <[email protected]>

Merge branch 'main' into isinstance-type

0b9d8ad

bharatr21 requested a review from mdboom December 2, 2025 23:21

Merge branch 'main' into isinstance-type

5e09e0c

mdboom approved these changes Dec 3, 2025

View reviewed changes

mdboom requested changes Dec 3, 2025

View reviewed changes

Fix test failures (attempt 1/n)

85bc86c

Signed-off-by: Bharat Raghunathan <[email protected]>

leofang requested changes Dec 4, 2025

View reviewed changes

mdboom requested changes Dec 4, 2025

View reviewed changes

cuda_core/cuda/core/experimental/_kernel_arg_handler.pyx Outdated Show resolved Hide resolved

cuda_core/cuda/core/experimental/_kernel_arg_handler.pyx Outdated Show resolved Hide resolved

bharatr21 and others added 2 commits December 4, 2025 10:27

Merge branch 'main' into isinstance-type

f07d82c

Apply suggestions from code review by @mdboom and @leofang

3f9b5f1

Explicit cast not needed since `prepare_arg` does it automatically Co-authored-by: Leo Fang <[email protected]>

bharatr21 requested review from leofang and mdboom December 4, 2025 16:31

Fix test_graph.py to use bool correctly in cudaGraphSetConditional call.

0fc1ed8

leofang reviewed Dec 5, 2025

View reviewed changes

cuda_core/tests/test_graph.py Outdated Show resolved Hide resolved

leofang reviewed Dec 5, 2025

View reviewed changes

cuda_core/cuda/core/experimental/_kernel_arg_handler.pyx Outdated Show resolved Hide resolved

leofang reviewed Dec 5, 2025

View reviewed changes

cuda_core/tests/test_graph.py Outdated Show resolved Hide resolved

mdboom added 2 commits December 5, 2025 07:55

More explicit typing of CUgraphConditionalHandle

3025e04

Improve conditional kernel tests by testing both bools and ints

48a0856

mdboom requested a review from leofang December 5, 2025 14:22

leofang requested changes Dec 5, 2025

View reviewed changes

cuda_core/cuda/core/experimental/_kernel_arg_handler.pyx Outdated Show resolved Hide resolved

cuda_core/cuda/core/experimental/_kernel_arg_handler.pyx Outdated Show resolved Hide resolved

fix

d9a7c14

leofang reviewed Dec 6, 2025

View reviewed changes

	prepare_arg[intptr_t](self.data, self.data_addresses, <intptr_t>int(arg), i)
	prepare_arg[unsigned long long](self.data, self.data_addresses, arg, i)

	prepare_arg[intptr_t](self.data, self.data_addresses, <intptr_t>int(arg), i)
	prepare_arg[cydriver.CUgraphConditionalHandle](self.data, self.data_addresses, arg, i)

chore: Replace isinstance(obj, T) with type(obj) is T comparisons #1292

Are you sure you want to change the base?

chore: Replace isinstance(obj, T) with type(obj) is T comparisons #1292

Uh oh!

Conversation

bharatr21 commented Dec 1, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Description

Additional Notes

Checklist

Uh oh!

copy-pr-bot bot commented Dec 1, 2025

Uh oh!

bharatr21 commented Dec 1, 2025

Uh oh!

mdboom commented Dec 2, 2025

Uh oh!

github-actions bot commented Dec 2, 2025

Preview will be ready when the GitHub Pages deployment is complete.

Uh oh!

mdboom left a comment • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

kkraus14 commented Dec 2, 2025

Uh oh!

bharatr21 commented Dec 2, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

bharatr21 commented Dec 2, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

kkraus14 commented Dec 2, 2025

Uh oh!

leofang commented Dec 2, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

bharatr21 commented Dec 2, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

leofang commented Dec 2, 2025

Uh oh!

bharatr21 commented Dec 2, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

mdboom commented Dec 3, 2025

Uh oh!

mdboom left a comment

Choose a reason for hiding this comment

Uh oh!

mdboom left a comment

Choose a reason for hiding this comment

Uh oh!

leofang left a comment

Choose a reason for hiding this comment

Uh oh!

bharatr21 commented Dec 4, 2025

Uh oh!

leofang commented Dec 4, 2025

Uh oh!

bharatr21 commented Dec 4, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

mdboom commented Dec 4, 2025

Uh oh!

mdboom left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

mdboom commented Dec 4, 2025

Uh oh!

leofang commented Dec 4, 2025

Uh oh!

mdboom commented Dec 4, 2025

Uh oh!

mdboom commented Dec 4, 2025

Uh oh!

mdboom commented Dec 4, 2025

Uh oh!

bharatr21 commented Dec 1, 2025 •

edited

Loading

mdboom left a comment •

edited

Loading

bharatr21 commented Dec 2, 2025 •

edited

Loading

bharatr21 commented Dec 2, 2025 •

edited

Loading

leofang commented Dec 2, 2025 •

edited

Loading

bharatr21 commented Dec 2, 2025 •

edited

Loading

bharatr21 commented Dec 2, 2025 •

edited

Loading

bharatr21 commented Dec 4, 2025 •

edited

Loading